# -*- coding: utf-8 -*-

# 1、爬取车家号（网址：https://chejiahao.autohome.com.cn/（4）
# 2、打开浏览器（8）
# 3、抓取首页代码（8）
# 4、获取任意车标题（8）
# 5、获取浏览数（8）
# 6、获取时间（8）
# 7、获取内容（8）
# 8、获取图片链接（8）
# 9、获取评论（8）
# 10、存入mongo（8）
# 11、存入txt（8）
# 12、存入csv（8）
# 13、在主要代码出写上注释（8）

import scrapy, json
from ..items import ChejiahaoItem


class CheSpider(scrapy.Spider):
    name = 'che'
    # allowed_domains = ['xxx']
    start_urls = ['https://chejiahao.autohome.com.cn/']

    def parse(self, response):
        title = response.xpath("//a/div[@class='introduceTop text-overflow']/text()")[0].extract()
        liulan = response.xpath("//*[@id='info-3373418']/div[3]/div[2]/span[1]/text()")[0].extract()
        shijian = response.xpath("//*[@id='info-3373418']/div[3]/div[2]/span[4]/text()")[0].extract()
        tupian = response.xpath("//div[@class='light-img']/div[1]/a/img[@class='indexlazy']/@src").extract()
        href = 'https://chejiahao.autohome.com.cn' + response.xpath("//div[@class='introduce fn-left']/a/@href")[
            0].extract()
        yield scrapy.Request(url=href, callback=self.parse_xq,
                             meta={'title': title, 'liulan': liulan, 'shijian': shijian, 'tupian': tupian})

    def parse_xq(self, response):
        title = response.meta['title']
        liulan = response.meta['liulan']
        shijian = response.meta['shijian']
        tupian = response.meta['tupian']
        content = response.xpath("//div[@class='introduce_content']/div[@class='chedan_describe']/text()")[
            0].extract().strip()
        pinglun = response.xpath("//div[@id='AutoBlogReplyList']//p[@class='user-comment']/text()").extract()
        comment_url = 'https://reply.autohome.com.cn/api/comments/show.json?id=3373418&page=1&appid=21&count=3&_=1551056189734'
        yield scrapy.Request(url=comment_url, callback=self.parse_pl,
                             meta={'title': title, 'liulan': liulan, 'shijian': shijian, 'tupian': tupian,
                                   'content': content})

    def parse_pl(self, response):
        title = response.meta['title']
        liulan = response.meta['liulan']
        shijian = response.meta['shijian']
        tupian = response.meta['tupian']
        content = response.meta['content']
        data = json.loads(response.text)
        pingluns = data['commentlist']
        pinglun = ''
        for pl in pingluns:
            pp = pl['RContent']
            pinglun += pp + ','
        item = ChejiahaoItem()
        item['title'] = title
        item['liulan'] = liulan
        item['shijian'] = shijian
        item['tupian'] = tupian
        item['content'] = content
        item['pinglun'] = pinglun
        yield item
